pacman::p_load(sf, spdep, GWmodel, SpatialML,
tmap, rsample, Metrics, tidyverse,
knitr, kableExtra, jsonlite)Take Home Exercise 3
Part 1 : Data And Packages
Packages
Data
We can get the lat long data from the previous output
location_data <- read_rds("data/processed_data/coords.rds")
resale_data <- read_csv("data/resale.csv")Rows: 192970 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (8): month, town, flat_type, block, street_name, storey_range, flat_mode...
dbl (3): floor_area_sqm, lease_commence_date, resale_price
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
resale_tidy <- resale_data %>%
mutate(address = paste(block,street_name)) %>%
mutate(remaining_lease_yr = as.integer(
str_sub(remaining_lease, 0, 2)))%>%
mutate(remaining_lease_mth = as.integer(
str_sub(remaining_lease, 9, 11)))We also got data from the other required decision parameters, which are :
Structural factors
- Area of the unit
- Floor level Remaining
- lease
- Age of the unit
Locational factors
- Proxomity to CBD
- Proximity to eldercare
- Proximity to foodcourt/hawker centres
- Proximity to MRT
- Proximity to park
- Proximity to good primary school ( All schools are good schools lol)
- Proximity to shopping mall
- Proximity to supermarket
- Numbers of kindergartens within 350m
- Numbers of childcare centres within 350m
- Numbers of bus stop within 350m
- Numbers of primary school within 1km
CBD_lat_long <- c(1.287953, 103.851784) # Taken from https://www.latlong.net/place/downtown-core-singapore-20616.html
CBD_svy21 <- st_sfc(st_point(c(103.851784, 1.287953)),
crs = 4326) %>%
st_transform(3414)
eldercare_data <- st_read(dsn = "data/EldercareServicesSHP",
layer = "ELDERCARE") %>% st_transform(3414)Reading layer `ELDERCARE' from data source
`C:\Users\Admin\Desktop\SMU\ISSS626\ISSS626-KierenChua\TakeHomeEx\TakeHomeEx03\data\EldercareServicesSHP'
using driver `ESRI Shapefile'
Simple feature collection with 133 features and 18 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 14481.92 ymin: 28218.43 xmax: 41665.14 ymax: 46804.9
Projected CRS: SVY21
foodcourt_data <- st_read("data/HawkerCentresGEOJSON.geojson") %>% st_transform(3414)Reading layer `HawkerCentresGEOJSON' from data source
`C:\Users\Admin\Desktop\SMU\ISSS626\ISSS626-KierenChua\TakeHomeEx\TakeHomeEx03\data\HawkerCentresGEOJSON.geojson'
using driver `GeoJSON'
Simple feature collection with 125 features and 2 fields
Geometry type: POINT
Dimension: XYZ
Bounding box: xmin: 103.6974 ymin: 1.272716 xmax: 103.9882 ymax: 1.449017
z_range: zmin: 0 zmax: 0
Geodetic CRS: WGS 84
MRT_data <- st_read("data/LTAMRTStationExitGEOJSON.geojson") %>% st_transform(3414)Reading layer `LTAMRTStationExitGEOJSON' from data source
`C:\Users\Admin\Desktop\SMU\ISSS626\ISSS626-KierenChua\TakeHomeEx\TakeHomeEx03\data\LTAMRTStationExitGEOJSON.geojson'
using driver `GeoJSON'
Simple feature collection with 563 features and 2 fields
Geometry type: POINT
Dimension: XYZ
Bounding box: xmin: 103.6368 ymin: 1.264972 xmax: 103.9893 ymax: 1.449157
z_range: zmin: 0 zmax: 0
Geodetic CRS: WGS 84
park_data <- st_read("data/Parks.kml") %>% st_transform(3414)Reading layer `NATIONALPARKS' from data source
`C:\Users\Admin\Desktop\SMU\ISSS626\ISSS626-KierenChua\TakeHomeEx\TakeHomeEx03\data\Parks.kml'
using driver `KML'
Simple feature collection with 430 features and 2 fields
Geometry type: POINT
Dimension: XYZ
Bounding box: xmin: 103.6929 ymin: 1.214491 xmax: 104.0538 ymax: 1.462094
z_range: zmin: 0 zmax: 0
Geodetic CRS: WGS 84
primarySchool_data <- st_read("data/LTASchoolZone.geojson") %>% st_transform(3414)Reading layer `LTASchoolZone' from data source
`C:\Users\Admin\Desktop\SMU\ISSS626\ISSS626-KierenChua\TakeHomeEx\TakeHomeEx03\data\LTASchoolZone.geojson'
using driver `GeoJSON'
Simple feature collection with 211 features and 2 fields
Geometry type: MULTIPOLYGON
Dimension: XY, XYZ
Bounding box: xmin: 103.687 ymin: 1.272736 xmax: 103.9668 ymax: 1.457587
z_range: zmin: 0 zmax: 0
Geodetic CRS: WGS 84
Warning in CPL_transform(x, crs, aoi, pipeline, reverse, desired_accuracy, :
GDAL Message 1: Sub-geometry 0 has coordinate dimension 2, but container has 3
Warning in CPL_transform(x, crs, aoi, pipeline, reverse, desired_accuracy, :
GDAL Message 1: Sub-geometry 1 has coordinate dimension 2, but container has 3
mall_data <- st_read(dsn = "data/MP14SDCPPWPLANMallandPromenadeSHP",
layer="G_MP14_PKWB_MALL_PROM_PL") %>% st_transform(3414)Reading layer `G_MP14_PKWB_MALL_PROM_PL' from data source
`C:\Users\Admin\Desktop\SMU\ISSS626\ISSS626-KierenChua\TakeHomeEx\TakeHomeEx03\data\MP14SDCPPWPLANMallandPromenadeSHP'
using driver `ESRI Shapefile'
Simple feature collection with 464 features and 8 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 15576.2 ymin: 24936 xmax: 40537.72 ymax: 48239.39
Projected CRS: SVY21
supermarket_data <- st_read("data/SupermarketsGEOJSON.geojson") %>% st_transform(3414)Reading layer `SupermarketsGEOJSON' from data source
`C:\Users\Admin\Desktop\SMU\ISSS626\ISSS626-KierenChua\TakeHomeEx\TakeHomeEx03\data\SupermarketsGEOJSON.geojson'
using driver `GeoJSON'
Simple feature collection with 526 features and 2 fields
Geometry type: POINT
Dimension: XYZ
Bounding box: xmin: 103.6258 ymin: 1.24715 xmax: 104.0036 ymax: 1.461526
z_range: zmin: 0 zmax: 0
Geodetic CRS: WGS 84
kindergarten_data <- st_read("data/Kindergartens.geojson") %>% st_transform(3414)Reading layer `Kindergartens' from data source
`C:\Users\Admin\Desktop\SMU\ISSS626\ISSS626-KierenChua\TakeHomeEx\TakeHomeEx03\data\Kindergartens.geojson'
using driver `GeoJSON'
Simple feature collection with 448 features and 2 fields
Geometry type: POINT
Dimension: XYZ
Bounding box: xmin: 103.6887 ymin: 1.247759 xmax: 103.9717 ymax: 1.455452
z_range: zmin: 0 zmax: 0
Geodetic CRS: WGS 84
childcare_data <- st_read("data/ChildCareServices.geojson") %>% st_transform(3414)Reading layer `ChildCareServices' from data source
`C:\Users\Admin\Desktop\SMU\ISSS626\ISSS626-KierenChua\TakeHomeEx\TakeHomeEx03\data\ChildCareServices.geojson'
using driver `GeoJSON'
Simple feature collection with 1925 features and 2 fields
Geometry type: POINT
Dimension: XYZ
Bounding box: xmin: 103.6878 ymin: 1.247759 xmax: 103.9897 ymax: 1.462134
z_range: zmin: 0 zmax: 0
Geodetic CRS: WGS 84
busstop_data <- st_read(dsn = "data/BusStopLocation_Jul2024",
layer= "BusStop") %>% st_transform(3414)Reading layer `BusStop' from data source
`C:\Users\Admin\Desktop\SMU\ISSS626\ISSS626-KierenChua\TakeHomeEx\TakeHomeEx03\data\BusStopLocation_Jul2024'
using driver `ESRI Shapefile'
Simple feature collection with 5166 features and 3 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 3970.122 ymin: 26482.1 xmax: 48285.52 ymax: 52983.82
Projected CRS: SVY21
Part 2 : Processing the data
Now we need to add all the data to the dataframe. We will also jitter the geomoetries of dependent variables just in case they overlap.
Locations of HDB
We can use the location data and join by postal code
resale_tidy_loc <- left_join(resale_tidy, location_data, by = "address")
resale_tidy_clean <- resale_tidy_loc %>%
filter(!is.na(postal))
# Then we convert the lat long into SVY21
resale_sf <- st_as_sf(resale_tidy_clean,
coords = c("longitude", "latitude"),
crs = 4326) %>%
st_transform(3414)
# Handle the NA in the lease months
resale_sf$remaining_lease_mth[is.na(resale_sf$remaining_lease_mth)] <- 0We also need to jitter the points so that the points do not share the same coordinates, we need to jitter quite abit for the regression to work latter.
resale_sf$geometry <- st_jitter(resale_sf$geometry, amount = 2)We can now show the data
tmap_mode("plot")tmap mode set to plotting
tm_shape(resale_sf) +
tm_dots()
Unit Age
resale_sf$unit_age <- 99 - resale_sf$remaining_lease_yrProximity to CBD
We can compare all locations to the single CBD coordinate and output a distance
distance_matrix <- st_distance(resale_sf$geometry, CBD_svy21)
resale_sf$PROX_CBD <- apply(distance_matrix, 1, min)Proximity to Eldercare
In this case we can find the closest elder-care to the HDB unit
distance_matrix <- st_distance(resale_sf$geometry, st_jitter(eldercare_data$geometry, amount = 2))
resale_sf$PROX_ELDER <- apply(distance_matrix, 1, min)Proximity to Hawker Center
We do the same thing here
distance_matrix <- st_distance(resale_sf$geometry, st_jitter(st_zm(foodcourt_data$geometry), amount = 2))
resale_sf$PROX_HAWKER <- apply(distance_matrix, 1, min)Proximity to MRT
distance_matrix <- st_distance(resale_sf$geometry, st_jitter(st_zm(MRT_data$geometry), amount = 2))
resale_sf$PROX_MRT <- apply(distance_matrix, 1, min)Proximity to Park
distance_matrix <- st_distance(resale_sf$geometry, st_jitter(st_zm(park_data$geometry), amount = 2))
resale_sf$PROX_PARK <- apply(distance_matrix, 1, min)Proximity to Primary School
We need to get the center of the primary school to compare against centroid. Need to drop the z value
primarySchool_data$geometry <- st_zm(primarySchool_data$geometry)
primarySchool_data$centroid <- st_centroid(primarySchool_data$geometry)
distance_matrix <- st_distance(resale_sf$geometry, st_jitter(primarySchool_data$centroid, amount = 2))
resale_sf$PROX_PRIM <- apply(distance_matrix, 1, min)Proximity to Shopping Mall
Same for the shopping mall
mall_data$centroid <- st_centroid(mall_data$geometry)
distance_matrix <- st_distance(resale_sf$geometry, st_jitter(mall_data$centroid, amount = 2))
resale_sf$PROX_MALL <- apply(distance_matrix, 1, min)Proximity to Supermarket
distance_matrix <- st_distance(resale_sf$geometry, st_jitter(st_zm(supermarket_data$geometry), amount = 2))
resale_sf$PROX_SPMK <- apply(distance_matrix, 1, min)Number of Kindergartens within 350m
To calculate the number of kindergartens within 350m of the HBD, we need to have a 350m search radius around each location, then count the number of kindergartens within
distance_matrix <- st_distance(resale_sf$geometry, st_jitter(st_zm(kindergarten_data$geometry), amount = 2))
count_within_350m <- apply(distance_matrix, 1, function(distances) {
sum(distances <= 350) # Count points within 350 meters
})
resale_sf$KIND_350 <- count_within_350mNumber of Childcares within 350m
distance_matrix <- st_distance(resale_sf$geometry, st_jitter(st_zm(childcare_data$geometry), amount = 2))
count_within_350m <- apply(distance_matrix, 1, function(distances) {
sum(distances <= 350) # Count points within 350 meters
})
resale_sf$CHILD_350 <- count_within_350mNumber of Bus-Stops within 350m
distance_matrix <- st_distance(resale_sf$geometry, st_jitter(busstop_data$geometry, amount = 2))
count_within_350m <- apply(distance_matrix, 1, function(distances) {
sum(distances <= 350) # Count points within 350 meters
})
resale_sf$BUS_350 <- count_within_350mNumber of Primary School within 1000m
distance_matrix <- st_distance(resale_sf$geometry, st_jitter(primarySchool_data$centroid, amount = 2))
count_within_1km <- apply(distance_matrix, 1, function(distances) {
sum(distances <= 1000) # Count points within 350 meters
})
resale_sf$PRI_1K <- count_within_1kmSaving the data
Now we can save the data for future purposes.
write_rds(resale_sf, "data/resale_sf_processed.rds")Part 3 : Shrinking the search space
Read the data
cleaned_resale_sf <- read_rds("data/resale_sf_processed.rds")
cleaned_resale_no_geom <- cleaned_resale_sf %>% st_drop_geometry()Because there is too much data, we will need to reduce the size of inspection. First we shall determine the types of flats available.
unique_flat_types <- unique(cleaned_resale_sf$flat_type)
unique_flat_types[1] "3 ROOM" "4 ROOM" "5 ROOM" "EXECUTIVE" "2 ROOM"
We also want to see the types of flats that are available
unique_flat_models <- unique(cleaned_resale_sf$flat_model)
unique_flat_models [1] "New Generation" "DBSS" "Improved"
[4] "Apartment" "Simplified" "Model A"
[7] "Model A-Maisonette" "Maisonette" "Standard"
[10] "Premium Apartment" "Type S1" "Model A2"
[13] "Type S2" "Adjoined flat" "Premium Apartment Loft"
[16] "2-room" "Premium Maisonette" "3Gen"
To have more focus on the data, we shall focus on the more expensive flat models vs the exercise given to us. To get an idea of what is expensive, we will need to see the spread of flat prices by their specific prices. We can get the mean price of each house type
mean_prices <- cleaned_resale_sf %>%
group_by(flat_model) %>%
summarise(mean_price = mean(resale_price, na.rm = TRUE))
print(mean_prices)Simple feature collection with 18 features and 2 fields
Geometry type: GEOMETRY
Dimension: XY
Bounding box: xmin: 11595.97 ymin: 28155.58 xmax: 42443.05 ymax: 48684.98
Projected CRS: SVY21 / Singapore TM
# A tibble: 18 × 3
flat_model mean_price geometry
<chr> <dbl> <GEOMETRY [m]>
1 2-room 344621. MULTIPOINT ((14056.05 35915.64), (15371.24…
2 3Gen 733630. MULTIPOINT ((14420.01 36876.04), (14420.72…
3 Adjoined flat 738722. MULTIPOINT ((14361.26 36505.09), (14743.75…
4 Apartment 703122. MULTIPOINT ((11635.52 36024.59), (11636.14…
5 DBSS 744929. MULTIPOINT ((15809.4 34517.19), (15809.56 …
6 Improved 510734. MULTIPOINT ((11754.26 35787.46), (11754.27…
7 Maisonette 749470. MULTIPOINT ((11635.19 36025.09), (11635.3 …
8 Model A 526007. MULTIPOINT ((11595.97 35820.57), (11596.36…
9 Model A-Maisonette 721084. MULTIPOINT ((15672.34 37432.88), (15672.45…
10 Model A2 406797. MULTIPOINT ((12884.62 35297.85), (12884.68…
11 New Generation 370598. MULTIPOINT ((14868.71 36801.51), (14868.85…
12 Premium Apartment 570944. MULTIPOINT ((12548.57 35413.26), (12548.8 …
13 Premium Apartment Loft 997578. MULTIPOINT ((35118.82 42847.07), (35119.06…
14 Premium Maisonette 808000 POINT (41890.38 38260.62)
15 Simplified 384347. MULTIPOINT ((18337.99 38162.01), (18338.6 …
16 Standard 352766. MULTIPOINT ((14905.22 36255.08), (14905.79…
17 Type S1 1016522. MULTIPOINT ((28849.11 28922.74), (28849.16…
18 Type S2 1130472. MULTIPOINT ((28849.21 28921.3), (28849.54 …
We can also see the mean prices by flat type
mean_prices <- cleaned_resale_sf %>%
group_by(flat_type) %>%
summarise(mean_price = mean(resale_price, na.rm = TRUE))
print(mean_prices)Simple feature collection with 5 features and 2 fields
Geometry type: MULTIPOINT
Dimension: XY
Bounding box: xmin: 11595.97 ymin: 28155.58 xmax: 42443.05 ymax: 48684.98
Projected CRS: SVY21 / Singapore TM
# A tibble: 5 × 3
flat_type mean_price geometry
<chr> <dbl> <MULTIPOINT [m]>
1 2 ROOM 297474. ((14055.55 35918.35), (14055.84 35916.08), (14056.05 359…
2 3 ROOM 369544. ((11595.97 35820.57), (11596.36 35822.87), (11596.7 3582…
3 4 ROOM 548271. ((11596.51 35820.84), (11596.87 35821.5), (11596.92 3582…
4 5 ROOM 630283. ((11754.26 35787.46), (11754.27 35788.86), (11754.96 357…
5 EXECUTIVE 709138. ((11635.19 36025.09), (11635.3 36024.8), (11635.52 36024…
Since the class exercise did the model for 2 and 3 room flat, we shall try to do a housing market that is higher in value which is the executive flats.
cleaned_resale_sf_cut <- cleaned_resale_sf %>% filter(flat_type %in% c('EXECUTIVE'))
# filter(flat_model %in% c('DBSS',
# 'Maisonette',
# '3Gen',
# 'Adjoined flat',
# 'Model A-Maisonette',
# 'Apartment'))
cleaned_resale_sf_cut_no_geom <- cleaned_resale_sf_cut %>%
st_drop_geometry()This leaves us with over 10000 data samples, which should be enough for us.
Part 4 : Computing Correlation Matrix
Bin the data
We need to bin some of the variables so that they make integers
Storeys
unqiue_storey <- unique(cleaned_resale_sf_cut_no_geom$storey_range)
unqiue_storey[1] "07 TO 09" "01 TO 03" "13 TO 15" "10 TO 12" "04 TO 06" "16 TO 18" "19 TO 21"
[8] "22 TO 24"
We need to map it by height, where a low value corresponds to a low height
# We need to arrange the the mapping by height
calculate_height <- function(range) {
# Split the range to get the lower and upper bounds
bounds <- as.numeric(unlist(strsplit(range, " TO ")))
avg_height <- mean(bounds)
return(avg_height)
}
heights <- sapply(unqiue_storey, calculate_height)
# Make the mapping
height_mapping <- setNames(sapply(unqiue_storey, calculate_height), unqiue_storey)We then apply this mapping to the dataframe
cleaned_resale_sf_cut_no_geom$storey_range_bin <- height_mapping[cleaned_resale_sf_cut_no_geom$storey_range]
cleaned_resale_sf_cut$storey_range_bin <- height_mapping[cleaned_resale_sf_cut$storey_range]Plotting the graph
We are not sure if all the variables are correlated, so we can build a correlation matrix to see if we need to exclude any variables
required_cols <- c(7, 9, 13, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)
corrplot::corrplot(cor(cleaned_resale_sf_cut_no_geom[, required_cols]),
diag = FALSE,
order = "AOE",
tl.pos = "td",
tl.cex = 0.5,
method = "number",
type = "upper")
We can see that lease commence date and remain lease yr have high correlation, so we remove lease commence date.
Variance Inflation Factor
We also can check the Variance Inflation Factor to see if there are any variables above a 5
Train-Test Split
First we need to do a train test split for any model training. Split shall be 65 - 35.
set.seed(1234)
# First we try to remove any NA values
cleaned_resale_sf_cut <- cleaned_resale_sf_cut[rowSums(is.na(st_drop_geometry(cleaned_resale_sf_cut))) == 0,, ]
resale_split <- initial_split(cleaned_resale_sf_cut,
prop = 6.5/10,)
train_data <- training(resale_split)We need to check for overlaps in the train data to see if there is data we need to remove.
overlap_matrix <- st_equals(train_data$geometry)
overlap_matrix <- sapply(overlap_matrix, function(x) length(x) > 1)
any_overlap <- any(overlap_matrix)
any_overlap[1] FALSE
Generating simple LM Model
price_mlr <- lm(resale_price ~ floor_area_sqm + storey_range_bin + remaining_lease_yr +
PROX_CBD + PROX_ELDER + PROX_HAWKER +
PROX_MRT + PROX_PARK + PROX_MALL +
PROX_SPMK + KIND_350 +
CHILD_350 + BUS_350 +
PRI_1K,
data=train_data)vif <- performance::check_collinearity(price_mlr)
kable(vif,
caption = "Variance Inflation Factor (VIF) Results") %>%
kable_styling(font_size = 18) | Term | VIF | VIF_CI_low | VIF_CI_high | SE_factor | Tolerance | Tolerance_CI_low | Tolerance_CI_high |
|---|---|---|---|---|---|---|---|
| floor_area_sqm | 1.623017 | 1.519949 | 1.746517 | 1.273977 | 0.6161365 | 0.5725683 | 0.6579169 |
| storey_range_bin | 1.069751 | 1.030736 | 1.158291 | 1.034288 | 0.9347970 | 0.8633407 | 0.9701808 |
| remaining_lease_yr | 2.066752 | 1.920597 | 2.236111 | 1.437620 | 0.4838509 | 0.4472050 | 0.5206713 |
| PROX_CBD | 1.496439 | 1.405892 | 1.607185 | 1.223290 | 0.6682533 | 0.6222059 | 0.7112923 |
| PROX_ELDER | 1.437179 | 1.352578 | 1.542080 | 1.198824 | 0.6958076 | 0.6484749 | 0.7393288 |
| PROX_HAWKER | 1.157005 | 1.103177 | 1.238915 | 1.075642 | 0.8643005 | 0.8071580 | 0.9064726 |
| PROX_MRT | 1.326985 | 1.253705 | 1.421429 | 1.151948 | 0.7535882 | 0.7035172 | 0.7976355 |
| PROX_PARK | 1.366267 | 1.288899 | 1.464354 | 1.168874 | 0.7319216 | 0.6828950 | 0.7758562 |
| PROX_MALL | 1.185892 | 1.128379 | 1.269170 | 1.088987 | 0.8432472 | 0.7879162 | 0.8862271 |
| PROX_SPMK | 1.328907 | 1.255426 | 1.423527 | 1.152782 | 0.7524982 | 0.7024806 | 0.7965425 |
| KIND_350 | 1.255049 | 1.189511 | 1.343252 | 1.120290 | 0.7967815 | 0.7444620 | 0.8406815 |
| CHILD_350 | 1.456864 | 1.370280 | 1.563694 | 1.207006 | 0.6864058 | 0.6395112 | 0.7297779 |
| BUS_350 | 1.299222 | 1.228883 | 1.391176 | 1.139834 | 0.7696916 | 0.7188163 | 0.8137471 |
| PRI_1K | 1.506553 | 1.414998 | 1.618307 | 1.227417 | 0.6637669 | 0.6179298 | 0.7067149 |
We can also plot this out for better visualization
plot(vif) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))Variable `Component` is not in your data frame :/

Part 5 : Generating Geographically Weighted Predictive Models
Convert to Spatial Datframe
# First we check for NA values in the traindata
train_data_sp <- as_Spatial(train_data)Get adaptive bandwidth
bw_adaptive <- bw.gwr(resale_price ~ floor_area_sqm +
storey_range_bin + remaining_lease_yr +
PROX_CBD + PROX_ELDER + PROX_HAWKER +
PROX_MRT + PROX_PARK + PROX_MALL +
PROX_SPMK + KIND_350 +
CHILD_350 + BUS_350 +
PRI_1K,
data=train_data_sp,
approach="CV",
kernel="gaussian",
adaptive=TRUE,
longlat=FALSE)Adaptive bandwidth: 929 CV score: 1.804659e+13
Adaptive bandwidth: 582 CV score: 1.716896e+13
Adaptive bandwidth: 367 CV score: 1.585665e+13
Adaptive bandwidth: 234 CV score: 1.456485e+13
Adaptive bandwidth: 152 CV score: 1.28309e+13
Adaptive bandwidth: 101 CV score: 1.097677e+13
Adaptive bandwidth: 70 CV score: 9.419643e+12
Adaptive bandwidth: 50 CV score: 8.525162e+12
Adaptive bandwidth: 38 CV score: 3.452569e+15
Adaptive bandwidth: 57 CV score: 8.714993e+12
Adaptive bandwidth: 45 CV score: 8.355569e+12
Adaptive bandwidth: 42 CV score: 9.069358e+12
Adaptive bandwidth: 47 CV score: 8.383025e+12
Adaptive bandwidth: 44 CV score: 8.26943e+12
Adaptive bandwidth: 43 CV score: 8.183484e+12
Adaptive bandwidth: 42 CV score: 9.069358e+12
Adaptive bandwidth: 43 CV score: 8.183484e+12
We will also save it for the future
write_rds(bw_adaptive, "data/model/bw_adaptive.rds")bw_adaptive <- read_rds("data/model/bw_adaptive.rds")Make the model
Now we will make the adaptive GWR model
gwr_adaptive <- gwr.basic(formula = resale_price ~ floor_area_sqm +
storey_range_bin + remaining_lease_yr +
PROX_CBD + PROX_ELDER + PROX_HAWKER +
PROX_MRT + PROX_PARK + PROX_MALL +
PROX_SPMK + KIND_350 +
CHILD_350 + BUS_350 +
PRI_1K,
data=train_data_sp,
bw=bw_adaptive,
kernel = 'gaussian',
adaptive=TRUE,
longlat = FALSE)Save a copy
write_rds(gwr_adaptive, "data/model/gwr_adaptive.rds")Reading the model
gwr_adaptive <- read_rds("data/model/gwr_adaptive.rds")gwr_adaptive ***********************************************************************
* Package GWmodel *
***********************************************************************
Program starts at: 2024-11-07 23:15:39.156384
Call:
gwr.basic(formula = resale_price ~ floor_area_sqm + storey_range_bin +
remaining_lease_yr + PROX_CBD + PROX_ELDER + PROX_HAWKER +
PROX_MRT + PROX_PARK + PROX_MALL + PROX_SPMK + KIND_350 +
CHILD_350 + BUS_350 + PRI_1K, data = train_data_sp, bw = bw_adaptive,
kernel = "gaussian", adaptive = TRUE, longlat = FALSE)
Dependent (y) variable: resale_price
Independent variables: floor_area_sqm storey_range_bin remaining_lease_yr PROX_CBD PROX_ELDER PROX_HAWKER PROX_MRT PROX_PARK PROX_MALL PROX_SPMK KIND_350 CHILD_350 BUS_350 PRI_1K
Number of data points: 1491
***********************************************************************
* Results of Global Regression *
***********************************************************************
Call:
lm(formula = formula, data = data)
Residuals:
Min 1Q Median 3Q Max
-316379 -82171 -15256 77866 543656
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 9.700e+05 7.840e+04 12.372 < 2e-16 ***
floor_area_sqm 3.192e+03 3.473e+02 9.193 < 2e-16 ***
storey_range_bin 6.406e+03 7.527e+02 8.510 < 2e-16 ***
remaining_lease_yr -7.663e+03 5.912e+02 -12.962 < 2e-16 ***
PROX_CBD -1.338e+01 1.079e+00 -12.404 < 2e-16 ***
PROX_ELDER -9.347e+00 5.231e+00 -1.787 0.074142 .
PROX_HAWKER -1.286e+01 6.945e+00 -1.852 0.064268 .
PROX_MRT -4.284e+01 8.096e+00 -5.291 1.4e-07 ***
PROX_PARK -9.348e-01 7.995e+00 -0.117 0.906944
PROX_MALL -5.585e-01 2.181e+00 -0.256 0.797946
PROX_SPMK -7.977e+01 2.394e+01 -3.332 0.000883 ***
KIND_350 2.767e+03 2.951e+03 0.937 0.348658
CHILD_350 1.929e+03 1.365e+03 1.414 0.157652
BUS_350 2.716e+03 1.095e+03 2.480 0.013256 *
PRI_1K -2.430e+03 2.274e+03 -1.069 0.285387
---Significance stars
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 111800 on 1476 degrees of freedom
Multiple R-squared: 0.4257
Adjusted R-squared: 0.4202
F-statistic: 78.14 on 14 and 1476 DF, p-value: < 2.2e-16
***Extra Diagnostic information
Residual sum of squares: 1.843321e+13
Sigma(hat): 111263.6
AIC: 38911.09
AICc: 38911.46
BIC: 37621.92
***********************************************************************
* Results of Geographically Weighted Regression *
***********************************************************************
*********************Model calibration information*********************
Kernel function: gaussian
Adaptive bandwidth: 43 (number of nearest neighbours)
Regression points: the same locations as observations are used.
Distance metric: Euclidean distance metric is used.
****************Summary of GWR coefficient estimates:******************
Min. 1st Qu. Median 3rd Qu.
Intercept -1.1458e+08 9.0584e+05 2.3476e+06 4.1578e+06
floor_area_sqm -1.0095e+04 -2.3189e+03 1.5025e+03 4.5092e+03
storey_range_bin 2.6762e+03 4.7944e+03 6.1521e+03 7.1848e+03
remaining_lease_yr -4.5799e+04 -3.5507e+04 -3.3273e+04 -2.3970e+04
PROX_CBD -2.0179e+04 -1.0879e+02 9.1102e+00 1.1889e+02
PROX_ELDER -5.1573e+03 -1.1531e+02 2.6472e+01 1.4697e+02
PROX_HAWKER -1.2341e+04 -1.1553e+02 -2.0993e+01 1.1991e+02
PROX_MRT -2.5100e+03 -1.3520e+02 -6.8808e+01 1.2412e+02
PROX_PARK -5.8389e+04 -6.8139e+01 2.0343e+01 1.2977e+02
PROX_MALL -3.3652e+03 -1.2091e+02 -7.7713e+00 9.2365e+01
PROX_SPMK -1.0006e+03 -2.0047e+02 -2.8545e+00 2.3504e+02
KIND_350 -4.1686e+05 -2.2452e+04 1.2304e+03 3.6768e+04
CHILD_350 -1.9868e+05 -1.9718e+04 1.7527e+03 1.7135e+04
BUS_350 -3.5090e+05 -1.0039e+04 1.3131e+03 1.3964e+04
PRI_1K -5.1887e+05 -2.2561e+04 -7.6044e+03 2.3571e+04
Max.
Intercept 283712604.5
floor_area_sqm 24310.2
storey_range_bin 12895.2
remaining_lease_yr -2092.9
PROX_CBD 8990.9
PROX_ELDER 14206.5
PROX_HAWKER 8075.1
PROX_MRT 13287.7
PROX_PARK 6248.9
PROX_MALL 10097.8
PROX_SPMK 18661.3
KIND_350 459017.4
CHILD_350 169685.7
BUS_350 117864.2
PRI_1K 676899.1
************************Diagnostic information*************************
Number of data points: 1491
Effective number of parameters (2trace(S) - trace(S'S)): 230.0504
Effective degrees of freedom (n-2trace(S) + trace(S'S)): 1260.95
AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 37644.84
AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 37382.17
BIC (GWR book, Fotheringham, et al. 2002,GWR p. 61, eq. 2.34): 37143.84
Residual sum of squares: 5.911935e+12
R-square value: 0.815799
Adjusted R-square value: 0.7821663
***********************************************************************
Program stops at: 2024-11-07 23:15:41.014313
From the results we can see that proximity to mall, supermarket and the numbers of bus stops, kindergartens and childcares within 350m all have a p-value more than 5%.
Computer Test Data Adaptive Bandwidth
test_data <- testing(resale_split)We also need to check for overlap in the test data
overlap_matrix <- st_equals(test_data$geometry)
overlap_matrix <- sapply(overlap_matrix, function(x) length(x) > 1)
any_overlap <- any(overlap_matrix)
any_overlap[1] FALSE
test_data_sp <- as_Spatial(test_data)gwr_bw_test_adaptive <- bw.gwr(resale_price ~ floor_area_sqm +
storey_range_bin + remaining_lease_yr +
PROX_CBD + PROX_ELDER + PROX_HAWKER +
PROX_MRT + PROX_PARK + PROX_MALL +
PROX_SPMK + KIND_350 +
CHILD_350 + BUS_350 +
PRI_1K,
data=test_data_sp,
approach="CV",
kernel="gaussian",
adaptive=TRUE,
longlat=FALSE)Adaptive bandwidth: 503 CV score: 1.09136e+13
Adaptive bandwidth: 319 CV score: 1.028795e+13
Adaptive bandwidth: 203 CV score: 9.493561e+12
Adaptive bandwidth: 134 CV score: 8.875089e+12
Adaptive bandwidth: 88 CV score: 7.970771e+12
Adaptive bandwidth: 63 CV score: 7.208276e+12
Adaptive bandwidth: 44 CV score: 6.50399e+12
Adaptive bandwidth: 36 CV score: 6.046744e+12
Adaptive bandwidth: 27 CV score: 5.781407e+12
Adaptive bandwidth: 25 CV score: 5.905641e+12
Adaptive bandwidth: 31 CV score: 6.022358e+12
Adaptive bandwidth: 27 CV score: 5.781407e+12
Now we can run prediction on the test dataset
Running Predictions on the test data
st_crs(train_data_sp)Coordinate Reference System:
User input: SVY21 / Singapore TM
wkt:
PROJCRS["SVY21 / Singapore TM",
BASEGEOGCRS["SVY21",
DATUM["SVY21",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4757]],
CONVERSION["Singapore Transverse Mercator",
METHOD["Transverse Mercator",
ID["EPSG",9807]],
PARAMETER["Latitude of natural origin",1.36666666666667,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8801]],
PARAMETER["Longitude of natural origin",103.833333333333,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8802]],
PARAMETER["Scale factor at natural origin",1,
SCALEUNIT["unity",1],
ID["EPSG",8805]],
PARAMETER["False easting",28001.642,
LENGTHUNIT["metre",1],
ID["EPSG",8806]],
PARAMETER["False northing",38744.572,
LENGTHUNIT["metre",1],
ID["EPSG",8807]]],
CS[Cartesian,2],
AXIS["northing (N)",north,
ORDER[1],
LENGTHUNIT["metre",1]],
AXIS["easting (E)",east,
ORDER[2],
LENGTHUNIT["metre",1]],
USAGE[
SCOPE["Cadastre, engineering survey, topographic mapping."],
AREA["Singapore - onshore and offshore."],
BBOX[1.13,103.59,1.47,104.07]],
ID["EPSG",3414]]
gwr_pred <- gwr.predict(formula = resale_price ~ floor_area_sqm +
storey_range_bin + remaining_lease_yr +
PROX_CBD + PROX_ELDER + PROX_HAWKER +
PROX_MRT + PROX_PARK + PROX_MALL +
PROX_SPMK + KIND_350 +
CHILD_350 + BUS_350 +
PRI_1K,
data= train_data_sp,
predictdata = test_data_sp,
bw=bw_adaptive,
kernel = 'gaussian',
adaptive= TRUE,
longlat = FALSE)Finding the Errors and Plotting Residuals
RMSE
Now we want to see how effective is our model by computing the residuals
Gwr_pred_df <- as.data.frame(gwr_pred$SDF)
test_data_gwr_pred <- cbind(test_data,
Gwr_pred_df)First we can compare the RMSE.
rmse(test_data_gwr_pred$resale_price,
test_data_gwr_pred$prediction)[1] 73529.59
ggplot(data = test_data_gwr_pred,
aes(x = prediction,
y = resale_price)) +
geom_point()
Plot Residuals
We can also plot this out on the map to see the residuals by location
test_data_gwr_pred$residuals <- test_data_gwr_pred$prediction - test_data_gwr_pred$resale_price
st_crs(test_data_gwr_pred)Coordinate Reference System:
User input: EPSG:3414
wkt:
PROJCRS["SVY21 / Singapore TM",
BASEGEOGCRS["SVY21",
DATUM["SVY21",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4757]],
CONVERSION["Singapore Transverse Mercator",
METHOD["Transverse Mercator",
ID["EPSG",9807]],
PARAMETER["Latitude of natural origin",1.36666666666667,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8801]],
PARAMETER["Longitude of natural origin",103.833333333333,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8802]],
PARAMETER["Scale factor at natural origin",1,
SCALEUNIT["unity",1],
ID["EPSG",8805]],
PARAMETER["False easting",28001.642,
LENGTHUNIT["metre",1],
ID["EPSG",8806]],
PARAMETER["False northing",38744.572,
LENGTHUNIT["metre",1],
ID["EPSG",8807]]],
CS[Cartesian,2],
AXIS["northing (N)",north,
ORDER[1],
LENGTHUNIT["metre",1]],
AXIS["easting (E)",east,
ORDER[2],
LENGTHUNIT["metre",1]],
USAGE[
SCOPE["Cadastre, engineering survey, topographic mapping."],
AREA["Singapore - onshore and offshore."],
BBOX[1.13,103.59,1.47,104.07]],
ID["EPSG",3414]]
mpsz = st_read(dsn = "data/geospatial", layer = "MP14_SUBZONE_WEB_PL") %>% st_transform(3414)Reading layer `MP14_SUBZONE_WEB_PL' from data source
`C:\Users\Admin\Desktop\SMU\ISSS626\ISSS626-KierenChua\TakeHomeEx\TakeHomeEx03\data\geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 323 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21
tmap_mode("view")tmap mode set to interactive viewing
tm_shape(mpsz) +
tmap_options(check.and.fix = TRUE) +
tm_polygons(alpha = 0.4) +
tm_shape(test_data_gwr_pred) +
tm_dots(col = "residuals",
alpha = 0.6,
style = "quantile")Warning: The shape mpsz is invalid (after reprojection). See sf::st_is_valid
Variable(s) "residuals" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
tmap_mode("plot")tmap mode set to plotting
Part 7 : Generating Random Forest Model
Now we can move on to creating the random forest model
coords_train <- st_coordinates(train_data)
coords_test <- st_coordinates(test_data)
train_data_nogeom <- train_data %>%
st_drop_geometry()After preparing the data, we can train the model below
Basic Random Forest
set.seed(1234)
rf <- ranger(resale_price ~ floor_area_sqm +
storey_range_bin + remaining_lease_yr +
PROX_CBD + PROX_ELDER + PROX_HAWKER +
PROX_MRT + PROX_PARK + PROX_MALL +
PROX_SPMK + KIND_350 +
CHILD_350 + BUS_350 +
PRI_1K,
data=train_data_nogeom)We can view the model output below
rfRanger result
Call:
ranger(resale_price ~ floor_area_sqm + storey_range_bin + remaining_lease_yr + PROX_CBD + PROX_ELDER + PROX_HAWKER + PROX_MRT + PROX_PARK + PROX_MALL + PROX_SPMK + KIND_350 + CHILD_350 + BUS_350 + PRI_1K, data = train_data_nogeom)
Type: Regression
Number of trees: 500
Sample size: 1491
Number of independent variables: 14
Mtry: 3
Target node size: 5
Variable importance mode: none
Splitrule: variance
OOB prediction error (MSE): 6076776558
R squared (OOB): 0.7178878
Cleaning up the data
The code now is taking up alot of space, so we nedd to clean up some of the data that we dont need for future
rm(rf)
rm(price_mlr)
rm(resale_split)
rm(gwr_adaptive)
rm(busstop_data)
rm(childcare_data)
rm(cleaned_resale_no_geom)
rm(cleaned_resale_sf)
rm(eldercare_data)
rm(foodcourt_data)
rm(kindergarten_data)
rm(mall_data)
rm(MRT_data)
rm(primarySchool_data)Geographically weighted Random Forest
set.seed(1234)
gwRF_adaptive <- grf(formula = resale_price ~ floor_area_sqm +
storey_range_bin + remaining_lease_yr +
PROX_CBD + PROX_ELDER + PROX_HAWKER +
PROX_MRT + PROX_PARK + PROX_MALL +
PROX_SPMK + KIND_350 +
CHILD_350 + BUS_350 +
PRI_1K,
dframe=train_data_nogeom,
bw=bw_adaptive,
kernel="adaptive",
coords=coords_train)
Number of Observations: 1491
Number of Independent Variables: 14
Kernel: Adaptive
Neightbours: 43
--------------- Global ML Model Summary ---------------
Ranger result
Call:
ranger(resale_price ~ floor_area_sqm + storey_range_bin + remaining_lease_yr + PROX_CBD + PROX_ELDER + PROX_HAWKER + PROX_MRT + PROX_PARK + PROX_MALL + PROX_SPMK + KIND_350 + CHILD_350 + BUS_350 + PRI_1K, data = train_data_nogeom, num.trees = 500, mtry = 4, importance = "impurity", num.threads = NULL)
Type: Regression
Number of trees: 500
Sample size: 1491
Number of independent variables: 14
Mtry: 4
Target node size: 5
Variable importance mode: impurity
Splitrule: variance
OOB prediction error (MSE): 5627227178
R squared (OOB): 0.7387579
Importance:
floor_area_sqm storey_range_bin remaining_lease_yr PROX_CBD
2.793287e+12 1.068047e+12 8.660680e+12 4.801178e+12
PROX_ELDER PROX_HAWKER PROX_MRT PROX_PARK
2.360187e+12 1.994787e+12 1.671799e+12 2.100767e+12
PROX_MALL PROX_SPMK KIND_350 CHILD_350
1.793411e+12 1.489033e+12 3.635287e+11 5.816626e+11
BUS_350 PRI_1K
9.485485e+11 4.183647e+11
Mean Square Error (Not OOB): 1146448182.4
R-squared (Not OOB) %: 94.674
AIC (Not OOB): 31132.162
AICc (Not OOB): 31132.488
--------------- Local Model Summary ---------------
Residuals OOB:
Min. 1st Qu. Median Mean 3rd Qu. Max.
-372000 -56334 -9537 -2398 49237 403343
Residuals Predicted (Not OOB):
Min. 1st Qu. Median Mean 3rd Qu. Max.
-93594.4 -9884.9 -1070.1 -227.3 7599.4 101693.6
Local Variable Importance:
Min Max Mean StD
floor_area_sqm 0 240358157242 21255182577 29894445011
storey_range_bin 3174473839 172467739028 25622892610 18255491834
remaining_lease_yr 22144626538 570920092816 169240859489 79901514551
PROX_CBD 7964273306 172808392947 34904731676 20087616625
PROX_ELDER 7834493574 277908912302 36905644766 23705167865
PROX_HAWKER 8172424476 223407220808 35510866772 21314093614
PROX_MRT 3030357091 294640622299 35508977820 26447770206
PROX_PARK 8252149820 243397797587 37063390413 26751764277
PROX_MALL 3152013192 217131776176 34630971444 21064385324
PROX_SPMK 3394089223 217826354161 36277097991 24515214088
KIND_350 0 72591366615 2639133862 4330666355
CHILD_350 0 97574637270 3683478437 4962501794
BUS_350 0 176475478113 4698776105 7496074844
PRI_1K 0 115774196800 3503806515 8023521723
Mean squared error (OOB): 7275325158.639
R-squared (OOB) %: 66.202
AIC (OOB): 33887.262
AICc (OOB): 33887.587
Mean squared error Predicted (Not OOB): 339900217.208
R-squared Predicted (Not OOB) %: 98.421
AIC Predicted (Not OOB): 29319.447
AICc Predicted (Not OOB): 29319.772
Calculation time (in seconds): 4.4373
We can then save the model for future use
write_rds(gwRF_adaptive, "data/model/gwRF_adaptive.rds")We then re-read it, mostly for running purposes
gwRF_adaptive <- read_rds("data/model/gwRF_adaptive.rds")Predicting with test data
test_data_nogeom <- cbind(
test_data, coords_test) %>%
st_drop_geometry()gwRF_pred <- predict.grf(gwRF_adaptive,
test_data_nogeom,
x.var.name="X",
y.var.name="Y",
local.w=1,
global.w=0)GRF_pred_df <- as.data.frame(gwRF_pred)
test_data_pred <- cbind(test_data,
GRF_pred_df)Save the data for the future
write_rds(test_data_pred, "data/test_results.rds")test_data_pred <- read_rds( "data/test_results.rds")Freeing Memory
The model is very large >15Gb so once we got th results, we should free up the memory
rm(gwRF_adaptive)Viewing Random Forest Prediction Error
Now we can compare the difference in values from predictions vs actual resale value by location
rmse(test_data_pred$resale_price,
test_data_pred$gwRF_pred)[1] 83170.34
ggplot(data = test_data_pred,
aes(x = gwRF_pred,
y = resale_price)) +
geom_point()
Show residuals
test_data_pred$residuals <- test_data_pred$gwRF_pred - test_data_pred$resale_price
st_crs(test_data_pred)Coordinate Reference System:
User input: EPSG:3414
wkt:
PROJCRS["SVY21 / Singapore TM",
BASEGEOGCRS["SVY21",
DATUM["SVY21",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4757]],
CONVERSION["Singapore Transverse Mercator",
METHOD["Transverse Mercator",
ID["EPSG",9807]],
PARAMETER["Latitude of natural origin",1.36666666666667,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8801]],
PARAMETER["Longitude of natural origin",103.833333333333,
ANGLEUNIT["degree",0.0174532925199433],
ID["EPSG",8802]],
PARAMETER["Scale factor at natural origin",1,
SCALEUNIT["unity",1],
ID["EPSG",8805]],
PARAMETER["False easting",28001.642,
LENGTHUNIT["metre",1],
ID["EPSG",8806]],
PARAMETER["False northing",38744.572,
LENGTHUNIT["metre",1],
ID["EPSG",8807]]],
CS[Cartesian,2],
AXIS["northing (N)",north,
ORDER[1],
LENGTHUNIT["metre",1]],
AXIS["easting (E)",east,
ORDER[2],
LENGTHUNIT["metre",1]],
USAGE[
SCOPE["Cadastre, engineering survey, topographic mapping."],
AREA["Singapore - onshore and offshore."],
BBOX[1.13,103.59,1.47,104.07]],
ID["EPSG",3414]]
# Load in the mpsz data
mpsz = st_read(dsn = "data/geospatial", layer = "MP14_SUBZONE_WEB_PL") %>% st_transform(3414)Reading layer `MP14_SUBZONE_WEB_PL' from data source
`C:\Users\Admin\Desktop\SMU\ISSS626\ISSS626-KierenChua\TakeHomeEx\TakeHomeEx03\data\geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 323 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21
Plot the map
tmap_mode("view")tmap mode set to interactive viewing
tm_shape(mpsz) +
tmap_options(check.and.fix = TRUE) +
tm_polygons(alpha = 0.4) +
tm_shape(test_data_pred) +
tm_dots(col = "residuals",
alpha = 0.6,
style = "quantile")Warning: The shape mpsz is invalid (after reprojection). See sf::st_is_valid
Variable(s) "residuals" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
tmap_mode("plot")tmap mode set to plotting